This is an R Markdown Notebook associated with the following manuscript: A recombinant mapping yeast population identifies mitotic growth loci influencing mtDNA instability through mitonuclear interactions Tuc H.M. Nguyen, Austen Tinz-Burdick, Meghan Lenhardt, Margaret Geertz, Franchesca Ramirez, Mark Schwartz, Michael Toledano1, Brooke Bonney, Benjamin Gaebler, Weiwei Liu, John F. Wolters, Ken Chiu, Anthony C. Fiumera, Heather L. Fiumera
Final figures edited in Inkscape.
libraries<-c("car", "dplyr", "ggplot2", "ggpubr", "multcomp", "multcompView","tidyverse", "mixlm")
lapply(libraries, require, character.only = TRUE )
## Loading required package: car
## Loading required package: carData
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: ggplot2
## Loading required package: ggpubr
## Loading required package: multcomp
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
## Loading required package: multcompView
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble 3.1.6 ✔ purrr 0.3.4
## ✔ tidyr 1.1.4 ✔ stringr 1.4.0
## ✔ readr 2.1.1 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ MASS::select() masks dplyr::select()
## ✖ purrr::some() masks car::some()
## Loading required package: mixlm
##
## Attaching package: 'mixlm'
## The following object is masked from 'package:dplyr':
##
## tally
## The following objects are masked from 'package:stats':
##
## glm, lm
## [[1]]
## [1] TRUE
##
## [[2]]
## [1] TRUE
##
## [[3]]
## [1] TRUE
##
## [[4]]
## [1] TRUE
##
## [[5]]
## [1] TRUE
##
## [[6]]
## [1] TRUE
##
## [[7]]
## [1] TRUE
##
## [[8]]
## [1] TRUE
#import data
data.wildisolates<-read.csv("parental_pet.csv", header = TRUE, stringsAsFactors = FALSE)
data.S288c<-read.csv("S288c_pet.csv", header = TRUE, stringsAsFactors = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## incomplete final line found by readTableHeader on 'S288c_pet.csv'
#set factors
data.wildisolates$StrainID<-as.factor(data.wildisolates$StrainID)
data.wildisolates$Nuclear<-as.factor(data.wildisolates$Nuclear)
data.wildisolates$NucPop<-as.factor(data.wildisolates$NucPop)
data.wildisolates$Mito<-as.factor(data.wildisolates$Mito)
data.wildisolates$MitoPop<-as.factor(data.wildisolates$MitoPop)
data.wildisolates$og_v_syn<-as.factor(data.wildisolates$og_v_syn)
data.wildisolates$Genotype<-as.factor(data.wildisolates$Genotype)
#set classes
data.wildisolates$Petite<-as.numeric(data.wildisolates$Petite)
data.wildisolates$Grande<-as.numeric(data.wildisolates$Grande)
data.wildisolates$Total<-as.numeric(data.wildisolates$Total)
Petite frequencies of S. cerevisiae wild isolates as boxplots. The populations as broadly defined in PMCID: 2659681, 6784862, and 4417123
#reorganize isolates for a certain order
data.wildisolates$Nuclear <- factor(data.wildisolates$Nuclear, levels=c("378604X","273614N","322134S","YJM981","YJM975","YJM978","BC187","DBVPG1373","L-1528","L-1374","DBVPG6765","YPS606","YPS128","UWOPS03-461.4","UWOPS05-227.2","UWOPS05-217.3","Y12","Y55","SK1","NCYC110","DBVPG6044"))
#set colorblind palette
cbbPalette2 <- c("WestAfrican" = "#E69F00","Sake" ="#D55E00","Malaysian"= "#CC79A7", "NorthAmerican"="#009E73","European"="#56B4E9" ,"mosaic"="#999999")
Fig1<-ggplot(data.wildisolates,aes(x=Nuclear,y=PetFreq,fill=NucPop))+geom_boxplot()+
theme_bw()+
theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.border = element_blank() ) +
theme(axis.line = element_line(color = 'black'))+
theme(axis.text.x = element_text(size = 10)) +scale_fill_manual(values=cbbPalette2)+
scale_y_continuous(breaks=seq(0,12,2))+
theme(legend.position = c(0.8,0.5))+
guides(fill=guide_legend(title="Population"))+
coord_flip()
Fig1
Figure 1 insert for S288c
#added as an insert in Inkscape
#plotted in the same x axis spacing as above
bp_S288c<-ggplot(data.S288c,aes(x=Nuclear,y=PetFreq,fill=NucPop))+geom_boxplot()+
theme_bw()+
theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.border = element_blank() ) +
theme(axis.line = element_line(color = 'black'))+
theme(axis.text.x = element_text(size = 10)) +scale_fill_manual(values=cbbPalette2)+
ylim(42,60)+
theme(legend.position = "none") +coord_flip()
bp_S288c
#### Table S2: ANOVA #### Petite frequencies vary by strain and population
Are the differences in petite frequencies due to genetic variation in individuals isolates and/or populations?
#The different numbers of petite/grande colonies counted in each assay are taken into account using cbind function
#the family=binomial accounts for non-normal distribution of petite frequencies
#the variable "Nuclear" is used instead of "StrainID" because there are both mata and x versions of some strains
FM.cbind<-glm(cbind(data.wildisolates$Petite,data.wildisolates$Grande)~data.wildisolates$NucPop/data.wildisolates$Nuclear, family=binomial)
anova(FM.cbind, test="Chisq")
pop.means<-data.frame(tapply(predict(FM.cbind, type = "response"), data.wildisolates$NucPop, mean))
pop.means
This section contains the analyses described in Figure 2A and B and Table S3.
Analyzes the petite frequencies of strains with the Y55 nuclear genotype with different mtDNAs
data.Y55<-read.csv("data.Y55.csv", header = TRUE, stringsAsFactors = FALSE)
#set factors
data.Y55$StrainID<-as.factor(data.Y55$StrainID)
data.Y55$Nuclear<-as.factor(data.Y55$Nuclear)
data.Y55$NucPop<-as.factor(data.Y55$NucPop)
data.Y55$Mito<-as.factor(data.Y55$Mito)
data.Y55$MitoPop<-as.factor(data.Y55$MitoPop)
data.Y55$og_v_syn<-as.factor(data.Y55$og_v_syn)
data.Y55$Genotype<-as.factor(data.Y55$Genotype)
#set classes
data.Y55$Petite<-as.numeric(data.Y55$Petite)
data.Y55$Grande<-as.numeric(data.Y55$Grande)
data.Y55$Total<-as.numeric(data.Y55$Total)
aggregate(PetFreq~Mito,data=data.Y55,mean)
#Table S3 anova... Does mtDNA variation contribute to petite frequencies?
FM.cbind2<-glm(cbind(data.Y55$Petite,data.Y55$Grande)~data.Y55$Mito, family=binomial)
anova(FM.cbind2, test="Chisq")
mtDNA analyses and correlations
This sections contains the analyses for Fig 2A,2B and correlations described in text
data.mtDNA_full<-read.table("nY55_pet_gc_intron_3.txt", header = TRUE, stringsAsFactors = FALSE)
data.mtDNA_full$log2_pm<-log2(data.mtDNA_full$petfreq_mean)
#subset data to just those that have pet freq in the Y55 mtDNA for analyzed mtDNAs
data.mtDNA_Y55nuc<-data.mtDNA_full%>%dplyr::select(petfreq_mean,log2_pm, mtDNA,mtPop,Length,GC_percent,TotalGCcount,total_intronlength,M1,M2,M3,M4,M1p,M2p,M2pp,G,V,U)
#remove rows with missing values
data.mtDNA_Y55nuc_complete<-data.mtDNA_Y55nuc[complete.cases(data.mtDNA_Y55nuc),]
#test to see if normally distributed
hist(data.mtDNA_Y55nuc_complete$log2_pm)
shapiro.test(data.mtDNA_Y55nuc_complete$log2_pm)
##
## Shapiro-Wilk normality test
##
## data: data.mtDNA_Y55nuc_complete$log2_pm
## W = 0.9448, p-value = 0.3796
Plots the petite frequencies of strains with the Y55 nuclear genotype and different mtDNAs vs. mtDNA GC content
cols2<-c("WineEuro"= "blue","Malaysian" ="#E69F00","mosaic"="#999999", "NthAm"="#009E73", "WestAfr" = "#F0E442", "Sake" = "#D55E00")
Fig2A<-ggplot(data=data.mtDNA_Y55nuc_complete, aes(x = GC_percent, y =petfreq_mean)) +
geom_point(aes(color=mtPop),size=3.5) +
scale_color_manual(values=cols2)+
theme_bw()+
theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.border = element_blank() ) +
theme(axis.line = element_line(color = 'black'))+
theme(axis.text.x = element_text(size = 10))+
theme(legend.position = "none")+
geom_smooth(method = "lm", se = FALSE, color="black")+
labs(x="mtDNA GC%",y="PetFreq")+theme(legend.position="none")
Fig2A
## `geom_smooth()` using formula 'y ~ x'
#### Correlation for Fig 2A: Correlation between petite frequency and GC percent
RM.GCp<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$GC_percent)
anova(RM.GCp) #GC_percent p =0.01341 *
summary(RM.GCp) #R^2 = 0.3435
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$GC_percent)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2315 -2.2226 -0.3326 1.2660 13.2191
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -63.582 26.897 -2.364 0.0320 *
## data.mtDNA_Y55nuc_complete$GC_percent 4.626 1.651 2.802 0.0134 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 5.656 on 15 degrees of freedom
## Multiple R-squared: 0.3435,
## Adjusted R-squared: 0.2998
## F-statistic: 7.85 on 1 and 15 DF, p-value: 0.01341
Anova(RM.GCp) #GC_percent p =0.01341
cor.test.gcppet<-cor.test(data.mtDNA_Y55nuc_complete$GC_percent,data.mtDNA_Y55nuc_complete$petfreq_mean)
cor.test.gcppet #r=0.5861
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$GC_percent and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 2.8017, df = 15, p-value = 0.01341
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1468400 0.8322935
## sample estimates:
## cor
## 0.5861178
cor.test.gcppet$p.value #GCpercent p = 0.01341*
## [1] 0.01341314
cor.test.gcppet.p<-round(cor.test.gcppet$p.value,3)
cor.test.gcppet.p #p=0.013 *
## [1] 0.013
cor.test.gcppet$estimate
## cor
## 0.5861178
cor.test.gcpet.r<-round(cor.test.gcppet$estimate)
cor.test.gcpet.r
## cor
## 1
gcppet.r<-round(cor.test.gcppet$estimate,2)
gcppet.r2<-round(cor.test.gcppet$estimate^2,2)
gcppet.r
## cor
## 0.59
gcppet.r2
## cor
## 0.34
Plots the petite frequencies of strains with the Y55 nuclear genotype and different mtDNAs vs. the numbers of M4-type GC clusters
Fig2B<-ggplot(data=data.mtDNA_Y55nuc_complete, aes(x = M4, y =petfreq_mean)) +
geom_point(aes(color=mtPop),size=3.5) +
scale_color_manual(values=cols2)+
theme_bw()+
theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.border = element_blank() ) +
theme(axis.line = element_line(color = 'black'))+
theme(axis.text.x = element_text(size = 10))+
theme(legend.position = "none")+
geom_smooth(method = "lm", se = FALSE, color="black")+
labs(x="total M4-type GC clusters",y="PetFreq")+theme(legend.position="none")
Fig2B
## `geom_smooth()` using formula 'y ~ x'
RM.M4<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$M4)
anova(RM.M4) #p= 0.003568 **
summary(RM.M4) #R^2 = 0.4425
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$M4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.832 -4.068 -1.219 1.880 11.004
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.2824 1.6020 5.170 0.000114 ***
## data.mtDNA_Y55nuc_complete$M4 0.5345 0.1549 3.451 0.003568 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 5.212 on 15 degrees of freedom
## Multiple R-squared: 0.4425,
## Adjusted R-squared: 0.4053
## F-statistic: 11.91 on 1 and 15 DF, p-value: 0.003568
Anova(RM.M4) #M4 p=
cor.test.m4<-cor.test(data.mtDNA_Y55nuc_complete$M4,data.mtDNA_Y55nuc_complete$petfreq_mean)
cor.test.m4 #r=0.0.66521
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$M4 and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 3.4505, df = 15, p-value = 0.003568
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2713116 0.8682496
## sample estimates:
## cor
## 0.66521
cor.test.m4$p.value #M4 p =
## [1] 0.003568085
cor.test.m4.p<-round(cor.test.m4$p.value,3)
cor.test.m4.p #m4 p=0.004 **
## [1] 0.004
m4.r<-round(cor.test.m4$estimate,2)
m4.r2<-round(cor.test.m4$estimate^2,2)
m4.r
## cor
## 0.67
m4.r2
## cor
## 0.44
Do GC cluster numbers and GC% correlate with each other? (yes)
FM.gcc.gcp<-lm(data.mtDNA_Y55nuc_complete$GC_percent~data.mtDNA_Y55nuc_complete$TotalGCcount)
anova(FM.gcc.gcp)
summary(FM.gcc.gcp)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$GC_percent ~ data.mtDNA_Y55nuc_complete$TotalGCcount)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4525 -0.2200 0.1045 0.1472 0.4147
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.861832 0.389217 30.48 6.59e-15
## data.mtDNA_Y55nuc_complete$TotalGCcount 0.026224 0.002279 11.51 7.66e-09
##
## (Intercept) ***
## data.mtDNA_Y55nuc_complete$TotalGCcount ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 0.2822 on 15 degrees of freedom
## Multiple R-squared: 0.8982,
## Adjusted R-squared: 0.8915
## F-statistic: 132.4 on 1 and 15 DF, p-value: 7.657e-09
cor.test.gccgcp<-cor.test(data.mtDNA_Y55nuc_complete$TotalGCcount,data.mtDNA_Y55nuc_complete$GC_percent)
cor.test.gccgcp
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$TotalGCcount and data.mtDNA_Y55nuc_complete$GC_percent
## t = 11.506, df = 15, p-value = 7.657e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8579176 0.9813575
## sample estimates:
## cor
## 0.947753
cor.test.gccgcp$p.value
## [1] 7.657339e-09
#yes, GC cluster numbers correlate with GC percent
Does GC$ and total mtDNAlength correlate? (yes)
FM.gcpl<-lm(data.mtDNA_Y55nuc_complete$Length~data.mtDNA_Y55nuc_complete$GC_percent)
anova(FM.gcpl)
summary(FM.gcpl)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$Length ~ data.mtDNA_Y55nuc_complete$GC_percent)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4612.8 -2319.2 -496.2 983.8 10784.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 35915 16714 2.149 0.0484 *
## data.mtDNA_Y55nuc_complete$GC_percent 2817 1026 2.746 0.0150 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 3515 on 15 degrees of freedom
## Multiple R-squared: 0.3345,
## Adjusted R-squared: 0.2901
## F-statistic: 7.54 on 1 and 15 DF, p-value: 0.01501
cor.test.gcpl<-cor.test(data.mtDNA_Y55nuc_complete$GC_percent,data.mtDNA_Y55nuc_complete$Length)
cor.test.gcpl
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$GC_percent and data.mtDNA_Y55nuc_complete$Length
## t = 2.7458, df = 15, p-value = 0.01501
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1353403 0.8286526
## sample estimates:
## cor
## 0.5783623
cor.test.gcpl$p.value
## [1] 0.01501181
#yes, GC percent and mtDNA length correlate
Do GC cluster numbers (total) correlate with total mtDNA length? (yes)
# do the total number of GC clusters correlate with mtDNA length?
FM.gcc.length<-lm(data.mtDNA_Y55nuc_complete$Length~data.mtDNA_Y55nuc_complete$TotalGCcount)
anova(FM.gcc.length)
summary(FM.gcc.length)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$Length ~ data.mtDNA_Y55nuc_complete$TotalGCcount)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4143.8 -1695.7 -462.5 631.3 8530.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 65960.88 4262.23 15.476 1.25e-10
## data.mtDNA_Y55nuc_complete$TotalGCcount 93.92 24.96 3.763 0.00188
##
## (Intercept) ***
## data.mtDNA_Y55nuc_complete$TotalGCcount **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 3090 on 15 degrees of freedom
## Multiple R-squared: 0.4856,
## Adjusted R-squared: 0.4513
## F-statistic: 14.16 on 1 and 15 DF, p-value: 0.00188
Anova(FM.gcc.length)
cor.test.gccl<-cor.test(data.mtDNA_Y55nuc_complete$TotalGCcount,data.mtDNA_Y55nuc_complete$Length)
cor.test.gccl
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$TotalGCcount and data.mtDNA_Y55nuc_complete$Length
## t = 3.763, df = 15, p-value = 0.00188
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3250890 0.8820597
## sample estimates:
## cor
## 0.6968494
cor.test.gccl$p.value
## [1] 0.001880282
#yes, GC% correlates with total mtDNA length
Does total intron length correlate with total mtDNA length? (yes)
FM.ibp.length<-lm(data.mtDNA_Y55nuc_complete$Length~data.mtDNA_Y55nuc_complete$total_intronlength)
anova(FM.ibp.length)
summary(FM.ibp.length)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$Length ~ data.mtDNA_Y55nuc_complete$total_intronlength)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3669.2 -1591.1 241.1 633.3 4658.8
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 6.515e+04 2.381e+03 27.366
## data.mtDNA_Y55nuc_complete$total_intronlength 1.206e+00 1.691e-01 7.131
## Pr(>|t|)
## (Intercept) 3.22e-14 ***
## data.mtDNA_Y55nuc_complete$total_intronlength 3.45e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 2056 on 15 degrees of freedom
## Multiple R-squared: 0.7722,
## Adjusted R-squared: 0.757
## F-statistic: 50.85 on 1 and 15 DF, p-value: 3.446e-06
Anova(FM.ibp.length)
cor.test.ibp<-cor.test(data.mtDNA_Y55nuc_complete$total_intronlength,data.mtDNA_Y55nuc_complete$Length)
cor.test.ibp
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$total_intronlength and data.mtDNA_Y55nuc_complete$Length
## t = 7.1306, df = 15, p-value = 3.446e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6891919 0.9557260
## sample estimates:
## cor
## 0.8787463
cor.test.ibp$p.value
## [1] 3.445509e-06
#yes, intron lengths correlate with total mtDNA length
Does total intron length correlate with total GC cluster count? (no)
FM.ibpgcc<-lm(data.mtDNA_Y55nuc_complete$total_intronlength~data.mtDNA_Y55nuc_complete$TotalGCcount)
anova(FM.ibpgcc)
summary(FM.ibpgcc)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$total_intronlength ~
## data.mtDNA_Y55nuc_complete$TotalGCcount)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4203.3 -1094.0 -476.1 536.8 8572.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6505.79 3890.43 1.672 0.1152
## data.mtDNA_Y55nuc_complete$TotalGCcount 43.21 22.78 1.897 0.0773 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 2820 on 15 degrees of freedom
## Multiple R-squared: 0.1934,
## Adjusted R-squared: 0.1397
## F-statistic: 3.597 on 1 and 15 DF, p-value: 0.0773
cor.test.ibpgcc<-cor.test(data.mtDNA_Y55nuc_complete$TotalGCcount,data.mtDNA_Y55nuc_complete$total_intronlength)
cor.test.ibpgcc
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$TotalGCcount and data.mtDNA_Y55nuc_complete$total_intronlength
## t = 1.8967, df = 15, p-value = 0.0773
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.05178294 0.75983117
## sample estimates:
## cor
## 0.4398083
cor.test.ibpgcc$p.value
## [1] 0.07729987
#no, the total intron length does not correlate with GC cluster count
Does total intron length correlate with total GC percent? (no)
FM.ibpgcp<-lm(data.mtDNA_Y55nuc_complete$total_intronlength~data.mtDNA_Y55nuc_complete$GC_percent)
anova(FM.ibpgcp)
summary(FM.ibpgcp)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$total_intronlength ~
## data.mtDNA_Y55nuc_complete$GC_percent)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3369.4 -1458.6 -822.5 211.3 9813.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3313.0 14267.1 -0.232 0.820
## data.mtDNA_Y55nuc_complete$GC_percent 1049.9 875.7 1.199 0.249
##
## s: 3000 on 15 degrees of freedom
## Multiple R-squared: 0.08745,
## Adjusted R-squared: 0.02661
## F-statistic: 1.437 on 1 and 15 DF, p-value: 0.2492
cor.test.ibpgcp<-cor.test(data.mtDNA_Y55nuc_complete$GC_percent,data.mtDNA_Y55nuc_complete$total_intronlength)
cor.test.ibpgcp
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$GC_percent and data.mtDNA_Y55nuc_complete$total_intronlength
## t = 1.1989, df = 15, p-value = 0.2492
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2155721 0.6797435
## sample estimates:
## cor
## 0.2957125
cor.test.ibpgcp$p.value
## [1] 0.2491614
#no, the total intron length does not correlate with GC cluster count
Do petite frequencies correlate with total GC cluster numbers? (no)
#Pet vs TotalGC cluster
RM.GCcluster<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$TotalGCcount)
anova(RM.GCcluster)
summary(RM.GCcluster)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$TotalGCcount)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.1392 -2.8345 -1.3316 0.7767 15.9829
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.63806 8.62552 -0.538 0.5987
## data.mtDNA_Y55nuc_complete$TotalGCcount 0.09705 0.05051 1.922 0.0739 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 6.253 on 15 degrees of freedom
## Multiple R-squared: 0.1975,
## Adjusted R-squared: 0.144
## F-statistic: 3.692 on 1 and 15 DF, p-value: 0.07387
Anova(RM.GCcluster)
cor.test.gcc<-cor.test(data.mtDNA_Y55nuc_complete$TotalGCcount,data.mtDNA_Y55nuc_complete$petfreq_mean)
cor.test.gcc
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$TotalGCcount and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 1.9216, df = 15, p-value = 0.07387
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.04602562 0.76225970
## sample estimates:
## cor
## 0.4444513
cor.test.gcc$p.value
## [1] 0.07386781
cor.test.gcc.p<-round(cor.test.gcc$p.value,3)
cor.test.gcc.p
## [1] 0.074
#no, petite frequency does not correlate with total GC cluster numbers
Do petite frequencies correlate with total mtDNA length? (no)
#Pet vs. Length
RM.Length<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$Length)
anova(RM.Length)
summary(RM.Length)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$Length)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.039 -3.748 -1.223 2.244 19.543
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.7972552 33.9761071 -0.141 0.890
## data.mtDNA_Y55nuc_complete$Length 0.0002015 0.0004151 0.486 0.634
##
## s: 6.926 on 15 degrees of freedom
## Multiple R-squared: 0.01547,
## Adjusted R-squared: -0.05016
## F-statistic: 0.2357 on 1 and 15 DF, p-value: 0.6343
Anova(RM.Length)
cor.test.tbp<-cor.test(data.mtDNA_Y55nuc_complete$Length,data.mtDNA_Y55nuc_complete$petfreq_mean)
cor.test.tbp
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$Length and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 0.48551, df = 15, p-value = 0.6343
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3789134 0.5708984
## sample estimates:
## cor
## 0.1243846
cor.test.tbp$p.value
## [1] 0.6343297
cor.test.tbp.p<-round(cor.test.tbp$p.value,3)
cor.test.tbp.p
## [1] 0.634
#no, petite frequency does not correlate with total mtDNA length
Do petite frequencies correlate with intron lengths? (no)
#Pet vs. Intron Length
RM.intronbp<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$total_intronlength)
anova(RM.intronbp)
summary(RM.intronbp)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$total_intronlength)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.701 -4.601 -1.705 2.306 19.146
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 14.4735530 8.0481879 1.798
## data.mtDNA_Y55nuc_complete$total_intronlength -0.0002030 0.0005715 -0.355
## Pr(>|t|)
## (Intercept) 0.0923 .
## data.mtDNA_Y55nuc_complete$total_intronlength 0.7274
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 6.951 on 15 degrees of freedom
## Multiple R-squared: 0.008341,
## Adjusted R-squared: -0.05777
## F-statistic: 0.1262 on 1 and 15 DF, p-value: 0.7274
Anova(RM.intronbp)
cor.test.ibppet<-cor.test(data.mtDNA_Y55nuc_complete$total_intronlength,data.mtDNA_Y55nuc_complete$petfreq_mean)
cor.test.ibppet
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$total_intronlength and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = -0.35519, df = 15, p-value = 0.7274
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5479205 0.4071916
## sample estimates:
## cor
## -0.09132729
cor.test.ibppet$p.value
## [1] 0.7273897
#no, total intron length does not correlate with petfreq
Do petite frequencies correlate with specific GC cluster types? (only M4 class)
#M1 cluster vs. petfreq_mean
RM.M1<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$M1)
anova(RM.M1)
summary(RM.M1)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$M1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.029 -2.934 -1.671 1.023 17.761
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.6043 7.8113 0.333 0.743
## data.mtDNA_Y55nuc_complete$M1 0.2054 0.1730 1.187 0.254
##
## s: 6.674 on 15 degrees of freedom
## Multiple R-squared: 0.08592,
## Adjusted R-squared: 0.02498
## F-statistic: 1.41 on 1 and 15 DF, p-value: 0.2535
Anova(RM.M1)
cor.test.m1<-cor.test(data.mtDNA_Y55nuc_complete$M1,data.mtDNA_Y55nuc_complete$petfreq_mean)
cor.test.m1
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$M1 and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 1.1874, df = 15, p-value = 0.2535
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2182716 0.6782166
## sample estimates:
## cor
## 0.2931252
cor.test.m1$p.value
## [1] 0.253523
cor.test.m1.p<-round(cor.test.m1$p.value,3)
cor.test.m1.p
## [1] 0.254
#not significant
#M2 cluster vs. petfreq_mean
RM.M2<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$M2)
anova(RM.M2)
summary(RM.M2)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$M2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.5515 -3.6054 -1.3358 0.7997 17.5870
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.3652 6.2248 0.541 0.597
## data.mtDNA_Y55nuc_complete$M2 0.2969 0.2149 1.382 0.187
##
## s: 6.575 on 15 degrees of freedom
## Multiple R-squared: 0.1129,
## Adjusted R-squared: 0.05374
## F-statistic: 1.909 on 1 and 15 DF, p-value: 0.1873
Anova(RM.M2)
cor.test.m2<-cor.test(data.mtDNA_Y55nuc_complete$M2,data.mtDNA_Y55nuc_complete$petfreq_mean)
cor.test.m2
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$M2 and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 1.3816, df = 15, p-value = 0.1873
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1725210 0.7030879
## sample estimates:
## cor
## 0.335984
cor.test.m2$p.value
## [1] 0.1873353
cor.test.m2.p<-round(cor.test.m2$p.value,3)
cor.test.m2.p
## [1] 0.187
#not significant
#M3 cluster vs. petfreq_mean
RM.M3<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$M3)
anova(RM.M3)
summary(RM.M3)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$M3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.681 -3.830 -1.432 1.498 17.082
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.4498 8.9812 0.161 0.874
## data.mtDNA_Y55nuc_complete$M3 0.6587 0.5688 1.158 0.265
##
## s: 6.688 on 15 degrees of freedom
## Multiple R-squared: 0.08205,
## Adjusted R-squared: 0.02085
## F-statistic: 1.341 on 1 and 15 DF, p-value: 0.265
Anova(RM.M3)
cor.test.m3<-cor.test(data.mtDNA_Y55nuc_complete$M3,data.mtDNA_Y55nuc_complete$petfreq_mean)
cor.test.m3
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$M3 and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 1.1579, df = 15, p-value = 0.265
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2252060 0.6742588
## sample estimates:
## cor
## 0.2864447
cor.test.m3$p.value
## [1] 0.2650034
cor.test.m3.p<-round(cor.test.m3$p.value,3)
cor.test.m3.p
## [1] 0.265
#not significant
#M4 vs. pet freq
RM.M4<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$M4)
anova(RM.M4)
summary(RM.M4)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$M4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.832 -4.068 -1.219 1.880 11.004
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.2824 1.6020 5.170 0.000114 ***
## data.mtDNA_Y55nuc_complete$M4 0.5345 0.1549 3.451 0.003568 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 5.212 on 15 degrees of freedom
## Multiple R-squared: 0.4425,
## Adjusted R-squared: 0.4053
## F-statistic: 11.91 on 1 and 15 DF, p-value: 0.003568
Anova(RM.M4)
cor.test.m4<-cor.test(data.mtDNA_Y55nuc_complete$M4,data.mtDNA_Y55nuc_complete$petfreq_mean)
cor.test.m4
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$M4 and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 3.4505, df = 15, p-value = 0.003568
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2713116 0.8682496
## sample estimates:
## cor
## 0.66521
cor.test.m4$p.value
## [1] 0.003568085
cor.test.m4.p<-round(cor.test.m4$p.value,3)
cor.test.m4.p
## [1] 0.004
#significant ** Fig 2B
#M1p vs. petfreq_mean
#RAW
RM.M1p<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$M1p)
anova(RM.M1p)
summary(RM.M1p)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$M1p)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.3068 -2.2128 -0.7875 3.7525 15.9792
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.0221 3.3812 5.33 8.41e-05 ***
## data.mtDNA_Y55nuc_complete$M1p -1.4773 0.7068 -2.09 0.0541 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 6.143 on 15 degrees of freedom
## Multiple R-squared: 0.2255,
## Adjusted R-squared: 0.1739
## F-statistic: 4.368 on 1 and 15 DF, p-value: 0.05406
Anova(RM.M1p)
cor.test.m1p<-cor.test(data.mtDNA_Y55nuc_complete$M1p,data.mtDNA_Y55nuc_complete$petfreq_mean)
cor.test.m1p
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$M1p and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = -2.09, df = 15, p-value = 0.05406
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.777970978 0.007434686
## sample estimates:
## cor
## -0.4749071
cor.test.m1p$p.value
## [1] 0.05405844
cor.test.m1p.p<-round(cor.test.m1p$p.value,3)
cor.test.m1p.p
## [1] 0.054
#not significant
#M2p cluster vs. petfreq_mean
RM.M2p<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$M2p)
anova(RM.M2p)
summary(RM.M2p)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$M2p)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.136 -3.557 -1.365 0.624 18.184
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.1514 5.2752 1.166 0.262
## data.mtDNA_Y55nuc_complete$M2p 0.3196 0.2901 1.101 0.288
##
## s: 6.714 on 15 degrees of freedom
## Multiple R-squared: 0.07483,
## Adjusted R-squared: 0.01316
## F-statistic: 1.213 on 1 and 15 DF, p-value: 0.288
Anova(RM.M2p)
cor.test.m2p<-cor.test(data.mtDNA_Y55nuc_complete$M2p,data.mtDNA_Y55nuc_complete$petfreq_mean)
cor.test.m2p
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$M2p and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 1.1015, df = 15, p-value = 0.288
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2384390 0.6665597
## sample estimates:
## cor
## 0.2735565
cor.test.m2p$p.value
## [1] 0.2880426
cor.test.m2p.p<-round(cor.test.m2p$p.value,3)
cor.test.m2p.p
## [1] 0.288
#not significant
#M2pp cluster vs. petfreq_mean
RM.M2pp<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$M2pp)
anova(RM.M2pp)
summary(RM.M2pp)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$M2pp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.659 -4.026 -2.131 2.438 19.016
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.2963 6.4719 2.363 0.032 *
## data.mtDNA_Y55nuc_complete$M2pp -0.5443 0.9405 -0.579 0.571
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 6.904 on 15 degrees of freedom
## Multiple R-squared: 0.02184,
## Adjusted R-squared: -0.04337
## F-statistic: 0.3349 on 1 and 15 DF, p-value: 0.5714
Anova(RM.M2pp)
cor.test.m2pp<-cor.test(data.mtDNA_Y55nuc_complete$M2pp,data.mtDNA_Y55nuc_complete$petfreq_mean)
cor.test.m2pp
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$M2pp and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = -0.57873, df = 15, p-value = 0.5714
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5867527 0.3583099
## sample estimates:
## cor
## -0.1477866
cor.test.m2pp$p.value
## [1] 0.5713597
cor.test.m2pp.p<-round(cor.test.m2pp$p.value,3)
cor.test.m2pp.p
## [1] 0.571
#G cluster vs. petfreq_mean
RM.G<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$G)
anova(RM.G)
summary(RM.G)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$G)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.160 -3.697 -1.655 1.655 19.215
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.2204 3.3888 2.721 0.0158 *
## data.mtDNA_Y55nuc_complete$G 0.8705 1.0473 0.831 0.4189
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 6.825 on 15 degrees of freedom
## Multiple R-squared: 0.04403,
## Adjusted R-squared: -0.0197
## F-statistic: 0.6909 on 1 and 15 DF, p-value: 0.4189
Anova(RM.G)
cor.test.G<-cor.test(data.mtDNA_Y55nuc_complete$G,data.mtDNA_Y55nuc_complete$petfreq_mean)
cor.test.G
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$G and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 0.83122, df = 15, p-value = 0.4189
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3011804 0.6272246
## sample estimates:
## cor
## 0.2098411
cor.test.G$p.value
## [1] 0.418886
cor.test.G.p<-round(cor.test.G$p.value,3)
cor.test.G.p
## [1] 0.419
#not significant
#V cluster vs. petfreq_mean
RM.V<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$V)
anova(RM.V)
summary(RM.V)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$V)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.6878 -5.0978 -0.6324 0.5156 17.2823
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.675 6.762 2.910 0.0108 *
## data.mtDNA_Y55nuc_complete$V -2.955 2.427 -1.218 0.2421
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 6.659 on 15 degrees of freedom
## Multiple R-squared: 0.08997,
## Adjusted R-squared: 0.02931
## F-statistic: 1.483 on 1 and 15 DF, p-value: 0.2421
Anova(RM.V)
cor.test.V<-cor.test(data.mtDNA_Y55nuc_complete$V,data.mtDNA_Y55nuc_complete$petfreq_mean)
cor.test.V
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$V and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = -1.2178, df = 15, p-value = 0.2421
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.6822409 0.2111270
## sample estimates:
## cor
## -0.2999565
cor.test.V$p.value
## [1] 0.2421096
cor.test.V.p<-round(cor.test.V$p.value,3)
cor.test.V.p
## [1] 0.242
#not significant
#U cluster vs. petfreq_mean
RM.U<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$U)
anova(RM.U)
summary(RM.U)
##
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$U)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.043 -3.947 0.541 1.907 17.689
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -27.9835 19.3727 -1.444 0.1692
## data.mtDNA_Y55nuc_complete$U 0.9843 0.4793 2.053 0.0579 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 6.167 on 15 degrees of freedom
## Multiple R-squared: 0.2194,
## Adjusted R-squared: 0.1674
## F-statistic: 4.217 on 1 and 15 DF, p-value: 0.0579
Anova(RM.U)
cor.test.U<-cor.test(data.mtDNA_Y55nuc_complete$U,data.mtDNA_Y55nuc_complete$petfreq_mean)
cor.test.U
##
## Pearson's product-moment correlation
##
## data: data.mtDNA_Y55nuc_complete$U and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 2.0534, df = 15, p-value = 0.0579
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.01576711 0.77465981
## sample estimates:
## cor
## 0.4684276
cor.test.U$p.value
## [1] 0.05789539
cor.test.U.p<-round(cor.test.U$p.value,3)
cor.test.U.p
## [1] 0.058
#not significant
Boxplot of petite frequencies for strains with their original mtDNAs or the mtDNA from Y55
data.Y55.OG<-read.csv("data.Y55.OG.csv", header = TRUE, stringsAsFactors = FALSE)
str(data.Y55.OG)
## 'data.frame': 71 obs. of 13 variables:
## $ StrainID : chr "ML9x1UB" "ML9x1UB" "ML9x1UB" "ML9x1UB" ...
## $ Nuclear : chr "Y55" "Y55" "Y55" "Y55" ...
## $ NucPop_liti : chr "WestAfrican" "WestAfrican" "WestAfrican" "WestAfrican" ...
## $ Mito : chr "Y55" "Y55" "Y55" "Y55" ...
## $ MitoPop : chr "WestAfrican" "WestAfrican" "WestAfrican" "WestAfrican" ...
## $ Ploidy : chr "Haploid" "Haploid" "Haploid" "Haploid" ...
## $ og_v_syn : chr "original" "original" "original" "original" ...
## $ Genotype : chr "MATxura3arg8::URA3" "MATxura3arg8::URA3" "MATxura3arg8::URA3" "MATxura3arg8::URA3" ...
## $ Petite : int 35 110 294 35 110 294 213 89 263 183 ...
## $ Grande : int 405 774 2864 405 774 2864 1738 1469 2384 1624 ...
## $ Total : int 440 884 3158 440 884 3158 1951 1558 2647 1807 ...
## $ PetFreq : num 7.95 12.44 9.31 7.95 12.44 ...
## $ Concatenated: chr "Y55 Y55" "Y55 Y55" "Y55 Y55" "Y55 Y55" ...
#set factors
data.Y55.OG$StrainID<-as.factor(data.Y55.OG$StrainID)
data.Y55.OG$Nuclear<-as.factor(data.Y55.OG$Nuclear)
data.Y55.OG$NucPop<-as.factor(data.Y55.OG$NucPop)
data.Y55.OG$Mito<-as.factor(data.Y55.OG$Mito)
data.Y55.OG$MitoPop<-as.factor(data.Y55.OG$MitoPop)
data.Y55.OG$og_v_syn<-as.factor(data.Y55.OG$og_v_syn)
data.Y55.OG$Genotype<-as.factor(data.Y55.OG$Genotype)
#set classes
data.Y55.OG$Petite<-as.numeric(data.Y55.OG$Petite)
data.Y55.OG$Grande<-as.numeric(data.Y55.OG$Grande)
data.Y55.OG$Total<-as.numeric(data.Y55.OG$Total)
#subset to compare each nuclear genotype with original vs. Y55 mtDNA
#not elegant but effective
data1<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="Y55")
data2<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="SK1")
data3<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="NCYC110")
data4<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="DBVPG6044")
data5<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="Y12")
data6<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="YPS606")
data7<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="BC187")
data8<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="YJM975")
data9<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="YJM981")
p<-ggplot(data.Y55.OG, aes(x = Nuclear, y = PetFreq, linetype = factor(og_v_syn), fill = factor(NucPop))) +
geom_boxplot(aes(fill = NucPop))+
theme_bw()+
theme(plot.background = element_blank(),panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank(),panel.border = element_blank() ) +
theme(axis.line = element_line(color = 'black'))+
theme(axis.text.x = element_text(size = 8))+
scale_fill_manual(name = "NucPop", values = cbbPalette2)+
scale_alpha_manual(name = "og_v_syn", values = c(1, 0.8)) +
scale_linetype_manual(name = "og_v_syn", values = c("solid", "dotted"))+
xlab("Nuclear genotype")+ ylab("Petite frequency (%)")+
theme(legend.position = "right")+
guides(fill=guide_legend(title="Population"))+
theme(legend.position = "right")
p
#### Fig 2C Stats Is there a difference between the petite frequencies of SK1nuc + SK1mt vs. SK1nuc +Y55mt?
FM.cbind.SK1<-glm(cbind(Petite,Grande)~og_v_syn,data=data2, family=binomial)
anova(FM.cbind.SK1, test="Chisq") #P=0.02 *
Is there a difference between the petite frequencies of NCYC110nuc + NCYC110mt vs. NCYC110nuc +Y55mt?
FM.cbind.NCYC110<-glm(cbind(Petite,Grande)~og_v_syn,data=data3, family=binomial)
anova(FM.cbind.NCYC110, test="Chisq") #P<0.001 ***
Is there a difference between the petite frequencies of DBVPG6044nuc + DBVPG6044mt vs. DBVPG6044nuc +Y55mt?
FM.cbind.DBVPG6044<-glm(cbind(Petite,Grande)~og_v_syn,data=data4, family=binomial)
anova(FM.cbind.DBVPG6044, test="Chisq") #P = 0.144 ns
Is there a difference between the petite frequencies of Y12nuc + Y12mt vs. Y12nuc +Y55mt?
FM.cbind.Y12<-glm(cbind(Petite,Grande)~og_v_syn,data=data5, family=binomial)
anova(FM.cbind.Y12, test="Chisq") #P=0.007 **
Is there a difference between the petite frequencies of YPS606nuc + YPS606mt vs. YPS606nuc +Y55mt?
FM.cbind.YPS606<-glm(cbind(Petite,Grande)~og_v_syn,data=data6, family=binomial)
anova(FM.cbind.YPS606, test="Chisq") #P<0.001 ***
Is there a difference between the petite frequencies of BC187nuc + BC187mt vs. BC187nuc +Y55mt?
FM.cbind.BC187<-glm(cbind(Petite,Grande)~og_v_syn,data=data7, family=binomial)
anova(FM.cbind.BC187, test="Chisq") #P=0.20 ns
Is there a difference between the petite frequencies of YJM975nuc + YJM975mt vs. YJM975nuc +Y55mt?
FM.cbind.YJM975<-glm(cbind(Petite,Grande)~og_v_syn,data=data8, family=binomial)
anova(FM.cbind.YJM975, test="Chisq") #P<0.001 ***
Is there a difference between the petite frequencies of YJM981nuc + YJM981mt vs. YJM981nuc +Y55mt?
FM.cbind.YJM981<-glm(cbind(Petite,Grande)~og_v_syn,data=data9, family=binomial)
anova(FM.cbind.YJM981, test="Chisq") #P<0.001 ***
Analysis of petite frequencies from a 4 nuc x 4 mtDNA mitonuclear collection (16 genotypes). All nuc and mtDNAs have West African origins.
import data
data.4x4<-read.csv("data.4x4mtnWA.csv", header = TRUE, stringsAsFactors = FALSE)
data.4x4$StrainID<-as.factor(data.4x4$StrainID)
data.4x4$Nuclear<-as.factor(data.4x4$Nuclear)
data.4x4$NucPop<-as.factor(data.4x4$NucPop)
data.4x4$Mito<-as.factor(data.4x4$Mito)
data.4x4$MitoPop<-as.factor(data.4x4$MitoPop)
data.4x4$og_v_syn<-as.factor(data.4x4$og_v_syn)
data.4x4$Genotypes<-as.factor(data.4x4$Genotype)
str(data.4x4)
## 'data.frame': 66 obs. of 13 variables:
## $ StrainID : Factor w/ 19 levels "21a1","ML11x1UA",..: 1 1 1 1 1 1 3 3 3 15 ...
## $ Nuclear : Factor w/ 4 levels "DBVPG6044","NCYC110",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ NucPop : Factor w/ 1 level "WestAfrican": 1 1 1 1 1 1 1 1 1 1 ...
## $ Mito : Factor w/ 4 levels "DBVPG6044","NCYC110",..: 1 1 1 1 1 1 1 1 1 2 ...
## $ MitoPop : Factor w/ 1 level "WestAfrican": 1 1 1 1 1 1 1 1 1 1 ...
## $ Ploidy : chr "Haploid" "Haploid" "Haploid" "Haploid" ...
## $ og_v_syn : Factor w/ 2 levels "original","synthetic": 1 1 1 1 1 1 1 1 1 2 ...
## $ Genotype : chr "MATaura3" "MATaura3" "MATaura3" "MATaura3" ...
## $ Petite : int 52 49 70 126 104 146 23 18 77 143 ...
## $ Grande : int 892 721 1005 2152 1835 2336 620 928 870 2087 ...
## $ Total : int 944 770 1075 2278 1939 2482 643 946 947 2230 ...
## $ PetFreq : num 5.51 6.36 6.51 5.53 5.36 5.88 3.58 1.9 8.13 6.41 ...
## $ Genotypes: Factor w/ 3 levels "MATaura3","MATxura3",..: 1 1 1 1 1 1 3 3 3 2 ...
cbbPalette <- c("#000000", "#009E73","#D55E00","#0072B2")
interaction.plot(data.4x4$Mito,data.4x4$Nuclear,data.4x4$PetFreq,
lwd=2.5,
xlab = "mitotype",ylab = "petite frequency (%)",
ylim=c(0,35),trace.label="Nuclear genotype", type = "b", col=c(cbbPalette),
pch = c(15,17,19,6), fixed =TRUE)
Analysis of petite frequencies from a 4 nuc x 4 mtDNA mitonuclear collection (16 genotypes). All nuc and mtDNAs have West African origins.
ANOVA Are there significant nuclear, mtDNA, and mitonuclear interactions in the 4x4 WA collection?
FM.cbind.test<-glm(cbind(Petite,Grande)~Nuclear*Mito,data=data.4x4, family=binomial)
anova(FM.cbind.test, test="Chisq")
Analysis of petite frequencies from a 4 nuc x 4 mtDNA mitonuclear collection (16 genotypes). All nuc and mtDNAs have West African origins. original = strains with their original, coadapted mtDNA synthetic = strains with introduced mtDNAs
cbbPalette3 <- c("original" = "#E69F00","synthetic" ="#999999")
FigS1<-ggplot(data.4x4,aes(x=Nuclear,y=PetFreq,fill=og_v_syn))+geom_boxplot()+
theme_bw()+
theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) +
theme(axis.text.x = element_text(size = 10, face="bold")) +scale_fill_manual(values=cbbPalette3)+
theme(axis.title = element_text(size = 12, face="bold"))+
xlab("Nuclear Genotype") + ylab("petite frequency (%)")+
theme(legend.position = c(0.3,0.8),
legend.title=element_text(size=12,face="bold"),
legend.text=element_text(size=10))+
guides(fill=guide_legend(title="Mitonuclear combination"))
FigS1
#### Fig S1 ##### anova comparisons
to calculate P values for each comparison of strains with original or synthetic mtn combinations
subset data
#subset data
data.DB<-subset(data.4x4,data.4x4$Nuclear=="DBVPG6044")
data.NC<-subset(data.4x4,data.4x4$Nuclear=="NCYC110")
data.Y55<-subset(data.4x4,data.4x4$Nuclear=="Y55")
data.SK1<-subset(data.4x4,data.4x4$Nuclear=="SK1")
RM.DB<-glm(cbind(Petite,Grande)~og_v_syn/StrainID,data=data.DB, family=binomial)
anova(RM.DB,test="Chisq")
RM.NC<-glm(cbind(Petite,Grande)~og_v_syn/StrainID,data=data.NC, family=binomial)
anova(RM.NC,test="Chisq")
RM.Y55<-glm(cbind(Petite,Grande)~og_v_syn/StrainID,data=data.Y55, family=binomial)
anova(RM.Y55,test="Chisq")
RM.SK1<-glm(cbind(Petite,Grande)~og_v_syn/StrainID,data=data.SK1, family=binomial)
anova(RM.SK1,test="Chisq")
import and prepare data
data.ko<-read.csv("parent_v_ko.csv", header = TRUE, stringsAsFactors = FALSE)
#set factors
data.ko$StrainID<-as.factor(data.ko$StrainID)
data.ko$Nuclear<-as.factor(data.ko$Nuclear)
data.ko$Mito<-as.factor(data.ko$Mito)
data.ko$og_v_syn<-as.factor(data.ko$og_v_syn)
#set classes
data.ko$Petite<-as.numeric(data.ko$Petite)
data.ko$Grande<-as.numeric(data.ko$Grande)
data.ko$Total<-as.numeric(data.ko$Total)
examine petite frequency differences in the parental strain BY4741 vs. knockouts
#first reorder dataset
data.ko$Nuclear<-factor(data.ko$Nuclear,levels=c("parent","est1D","hgh1D","bns1D","smi1D","yat1D"))
Fig6A<-ggplot(data.ko, aes(x=Nuclear, y=Petfreq, fill=Nuclear)) +
geom_violin(trim=FALSE)+theme_bw() +geom_boxplot(width=0.1)+ scale_fill_manual(values=c("#999999", "#D55E00", "#006633","#66CC66","#00FF00","#3399FF"))+theme(legend.position="none")
Fig6A
##### Fig 6A Anova comparisons ##### compare parental strain vs. each knockout
#subset to parent and est1D
data.e<-subset(data.ko,data.ko$Nuclear=="parent"|data.ko$Nuclear=="est1D")
str(data.e)
## 'data.frame': 40 obs. of 12 variables:
## $ StrainID : Factor w/ 6 levels "BY4741","YAR035W",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ Nuclear : Factor w/ 6 levels "parent","est1D",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Mito : Factor w/ 1 level "BY4741": 1 1 1 1 1 1 1 1 1 1 ...
## $ og_v_syn : Factor w/ 1 level "original": 1 1 1 1 1 1 1 1 1 1 ...
## $ Assay : chr "fluccuation" "fluccuation" "fluccuation" "fluccuation" ...
## $ Plate : chr "5A" "5B" "5C" "5D" ...
## $ Petite : num 8 8 6 3 12 10 8 1 4 4 ...
## $ Grande : num 54 83 45 71 79 45 57 25 24 53 ...
## $ Total : num 62 91 51 74 91 55 65 26 28 57 ...
## $ Petfreq : num 12.9 8.79 11.76 4.05 13.19 ...
## $ Pet.mean : num 12.9 12.9 12.9 12.9 12.9 ...
## $ Pet.median: num 12.6 12.6 12.6 12.6 12.6 ...
wilcox.test(data.e$Petfreq~data.e$Nuclear) #W=383, p=1.75e-08***
##
## Wilcoxon rank sum exact test
##
## data: data.e$Petfreq by data.e$Nuclear
## W = 383, p-value = 1.758e-08
## alternative hypothesis: true location shift is not equal to 0
FM.e<-lm(data.e$Petfreq~data.e$Nuclear)
summary(FM.e)
##
## Call:
## lm(formula = data.e$Petfreq ~ data.e$Nuclear)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.6320 -3.6367 -0.6807 1.8520 20.4012
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.694 1.095 19.811 < 2e-16 ***
## data.e$Nuclear1 8.762 1.095 8.001 1.13e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 6.926 on 38 degrees of freedom
## Multiple R-squared: 0.6275,
## Adjusted R-squared: 0.6177
## F-statistic: 64.02 on 1 and 38 DF, p-value: 1.135e-09
anova(FM.e) #p=1.135e-09***
FM.e2<-glm(cbind(data.e$Petite, data.e$Grande)~data.e$Nuclear, family = binomial)
anova(FM.e2, test = "Chisq") #p<2.2e-16 ***
#subset to parent and hgh1
data.h<-subset(data.ko,data.ko$Nuclear=="parent"|data.ko$Nuclear=="hgh1D")
str(data.h)
## 'data.frame': 37 obs. of 12 variables:
## $ StrainID : Factor w/ 6 levels "BY4741","YAR035W",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Nuclear : Factor w/ 6 levels "parent","est1D",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Mito : Factor w/ 1 level "BY4741": 1 1 1 1 1 1 1 1 1 1 ...
## $ og_v_syn : Factor w/ 1 level "original": 1 1 1 1 1 1 1 1 1 1 ...
## $ Assay : chr "fluccuation" "fluccuation" "fluccuation" "fluccuation" ...
## $ Plate : chr "2A" "2B" "2C" "2D" ...
## $ Petite : num 30 12 25 15 11 16 5 20 21 13 ...
## $ Grande : num 65 67 71 71 44 66 22 47 46 47 ...
## $ Total : num 95 79 96 86 55 82 27 67 67 60 ...
## $ Petfreq : num 31.6 15.2 26 17.4 20 ...
## $ Pet.mean : num 24 24 24 24 24 ...
## $ Pet.median: num 22.2 22.2 22.2 22.2 22.2 ...
wilcox.test(data.h$Petfreq~data.h$Nuclear)#W=257 p=0.008369** #warning "cannot compute exact p=value with ties"
## Warning in wilcox.test.default(x = c(26.92307692, 25.26315789, 28.84615385, :
## cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: data.h$Petfreq by data.h$Nuclear
## W = 257, p-value = 0.008369
## alternative hypothesis: true location shift is not equal to 0
FM.h<-lm(data.h$Petfreq~data.h$Nuclear)
summary(FM.h)
##
## Call:
## lm(formula = data.h$Petfreq ~ data.h$Nuclear)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.632 -3.958 -1.609 4.990 17.896
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.207 1.094 24.878 < 2e-16 ***
## data.h$Nuclear1 3.249 1.094 2.971 0.00534 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 6.63 on 35 degrees of freedom
## Multiple R-squared: 0.2014,
## Adjusted R-squared: 0.1786
## F-statistic: 8.826 on 1 and 35 DF, p-value: 0.005338
anova(FM.h) #p=0.005338**
FM.h2<-glm(cbind(data.h$Petite, data.h$Grande)~data.h$Nuclear, family = binomial)
anova(FM.h2, test = "Chisq") #p<0.0001307 ***
#subset to parent and bns1
data.b<-subset(data.ko,data.ko$Nuclear=="parent"|data.ko$Nuclear=="bns1D")
str(data.b)
## 'data.frame': 40 obs. of 12 variables:
## $ StrainID : Factor w/ 6 levels "BY4741","YAR035W",..: 5 5 5 5 5 5 5 5 5 5 ...
## $ Nuclear : Factor w/ 6 levels "parent","est1D",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ Mito : Factor w/ 1 level "BY4741": 1 1 1 1 1 1 1 1 1 1 ...
## $ og_v_syn : Factor w/ 1 level "original": 1 1 1 1 1 1 1 1 1 1 ...
## $ Assay : chr "fluccuation" "fluccuation" "fluccuation" "fluccuation" ...
## $ Plate : chr "1A" "1B" "1C" "1D" ...
## $ Petite : num 19 23 37 32 18 16 15 15 22 7 ...
## $ Grande : num 31 43 67 59 64 50 49 38 49 27 ...
## $ Total : num 50 66 104 91 82 66 64 53 71 34 ...
## $ Petfreq : num 38 34.8 35.6 35.2 22 ...
## $ Pet.mean : num 29.2 29.2 29.2 29.2 29.2 ...
## $ Pet.median: num 28.4 28.4 28.4 28.4 28.4 ...
wilcox.test(data.b$Petfreq~data.b$Nuclear) #W=217, p=0.6588
##
## Wilcoxon rank sum exact test
##
## data: data.b$Petfreq by data.b$Nuclear
## W = 217, p-value = 0.6588
## alternative hypothesis: true location shift is not equal to 0
FM.b<-lm(data.b$Petfreq~data.b$Nuclear)
summary(FM.b)
##
## Call:
## lm(formula = data.b$Petfreq ~ data.b$Nuclear)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.632 -4.655 -1.022 5.433 17.896
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.8067 1.0669 27.937 <2e-16 ***
## data.b$Nuclear1 0.6489 1.0669 0.608 0.547
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 6.748 on 38 degrees of freedom
## Multiple R-squared: 0.009639,
## Adjusted R-squared: -0.01642
## F-statistic: 0.3699 on 1 and 38 DF, p-value: 0.5467
anova(FM.b) #p=0.547 ns
FM.b2<-glm(cbind(data.b$Petite, data.b$Grande)~data.b$Nuclear, family = binomial)
anova(FM.b2, test = "Chisq") #p<0.0.7255 ns
#subset to parent and smi1
data.s<-subset(data.ko,data.ko$Nuclear=="parent"|data.ko$Nuclear=="smi1D")
str(data.s)
## 'data.frame': 40 obs. of 12 variables:
## $ StrainID : Factor w/ 6 levels "BY4741","YAR035W",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Nuclear : Factor w/ 6 levels "parent","est1D",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Mito : Factor w/ 1 level "BY4741": 1 1 1 1 1 1 1 1 1 1 ...
## $ og_v_syn : Factor w/ 1 level "original": 1 1 1 1 1 1 1 1 1 1 ...
## $ Assay : chr "fluccuation" "fluccuation" "fluccuation" "fluccuation" ...
## $ Plate : chr "6A" "6B" "6C" "6D" ...
## $ Petite : num 21 24 45 33 24 31 31 12 25 43 ...
## $ Grande : num 57 71 111 70 58 53 49 45 100 101 ...
## $ Total : num 78 95 156 103 82 84 80 57 125 144 ...
## $ Petfreq : num 26.9 25.3 28.8 32 29.3 ...
## $ Pet.mean : num 30.5 30.5 30.5 30.5 30.5 ...
## $ Pet.median: num 29.1 29.1 29.1 29.1 29.1 ...
wilcox.test(data.s$Petfreq~data.s$Nuclear) #W=208, p=0.8392 #warning "cannot compute exact p=value with ties"
## Warning in wilcox.test.default(x = c(26.92307692, 25.26315789, 28.84615385, :
## cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: data.s$Petfreq by data.s$Nuclear
## W = 208, p-value = 0.8392
## alternative hypothesis: true location shift is not equal to 0
FM.s<-lm(data.s$Petfreq~data.s$Nuclear)
summary(FM.s)
##
## Call:
## lm(formula = data.s$Petfreq ~ data.s$Nuclear)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.247 -5.252 -1.699 5.571 18.143
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.4421 1.2980 23.45 <2e-16 ***
## data.s$Nuclear1 0.0134 1.2980 0.01 0.992
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 8.209 on 38 degrees of freedom
## Multiple R-squared: 2.805e-06,
## Adjusted R-squared: -0.02631
## F-statistic: 0.0001066 on 1 and 38 DF, p-value: 0.9918
anova(FM.s) #p=0.9918 ns
FM.s2<-glm(cbind(data.s$Petite, data.s$Grande)~data.s$Nuclear, family = binomial)
anova(FM.s2, test = "Chisq") #p<0.0.7886 ns
#subset to parent and yat1
data.y<-subset(data.ko,data.ko$Nuclear=="parent"|data.ko$Nuclear=="yat1D")
str(data.y)
## 'data.frame': 40 obs. of 12 variables:
## $ StrainID : Factor w/ 6 levels "BY4741","YAR035W",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Nuclear : Factor w/ 6 levels "parent","est1D",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Mito : Factor w/ 1 level "BY4741": 1 1 1 1 1 1 1 1 1 1 ...
## $ og_v_syn : Factor w/ 1 level "original": 1 1 1 1 1 1 1 1 1 1 ...
## $ Assay : chr "fluccuation" "fluccuation" "fluccuation" "fluccuation" ...
## $ Plate : chr "6A" "6B" "6C" "6D" ...
## $ Petite : num 21 24 45 33 24 31 31 12 25 43 ...
## $ Grande : num 57 71 111 70 58 53 49 45 100 101 ...
## $ Total : num 78 95 156 103 82 84 80 57 125 144 ...
## $ Petfreq : num 26.9 25.3 28.8 32 29.3 ...
## $ Pet.mean : num 30.5 30.5 30.5 30.5 30.5 ...
## $ Pet.median: num 29.1 29.1 29.1 29.1 29.1 ...
wilcox.test(data.y$Petfreq~data.y$Nuclear) #W=172 p=0.4569 #warning "cannot compute exact p=value with ties"
## Warning in wilcox.test.default(x = c(26.92307692, 25.26315789, 28.84615385, :
## cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: data.y$Petfreq by data.y$Nuclear
## W = 172, p-value = 0.4569
## alternative hypothesis: true location shift is not equal to 0
FM.y<-lm(data.y$Petfreq~data.y$Nuclear)
summary(FM.y)
##
## Call:
## lm(formula = data.y$Petfreq ~ data.y$Nuclear)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.791 -3.867 -1.398 4.764 17.896
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.3189 1.2608 24.841 <2e-16 ***
## data.y$Nuclear1 -0.8634 1.2608 -0.685 0.498
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 7.974 on 38 degrees of freedom
## Multiple R-squared: 0.01219,
## Adjusted R-squared: -0.0138
## F-statistic: 0.469 on 1 and 38 DF, p-value: 0.4976
anova(FM.y) #p=0.4978
FM.y2<-glm(cbind(data.y$Petite, data.y$Grande)~data.y$Nuclear, family = binomial)
anova(FM.y2, test = "Chisq") #p<0.0.3097 ns
import data
data.ko.mtn<-read.csv("data.ko.phy.mtn2.csv", header = TRUE, stringsAsFactors = FALSE)
data.ko.mtn$Mito<-as.factor(data.ko.mtn$Mito)
data.ko.mtn$Nuclear<-as.factor(data.ko.mtn$Nuclear)
data.ko.mtn$StrainID<-as.factor(data.ko.mtn$StrainID)
data.ko.mtn$og_v_syn<-as.factor(data.ko.mtn$og_v_syn)
data.ko.mtn$Assay<-as.factor(data.ko.mtn$Assay)
data.ko.mtn$Plate<-as.factor(data.ko.mtn$Plate)
interaction plot
interaction.plot(data.ko.mtn$Mito,data.ko.mtn$Nuclear,data.ko.mtn$Pet.mean,
xlab = "Mito",ylab = "Petmean",
ylim=c(0,45),trace.label="nuclear", type = "b", col=c("#D55E00","green","black","blue"),lty=c(1,1,2,1),lwd=2,
pch = c(18,16,17,15),fixed =TRUE)
##### Fig 6B and Table S10 Statistics ##### Table S10 anovas ###### Table S10 anova on full dataset
FM.ko.mtn.cbind<-glm(cbind(data.ko.mtn$Petite, data.ko.mtn$Grande)~data.ko.mtn$Nuclear*data.ko.mtn$Mito, family = binomial)
anova(FM.ko.mtn.cbind, test = "Chisq")
data.ph<-subset(data.ko.mtn,data.ko.mtn$Nuclear=="parent"|data.ko.mtn$Nuclear=="hgh1D")
data.phPN<-subset(data.ph,data.ph$Mito=="BY4741"|data.ph$Mito=="NCYC110")
data.phPY<-subset(data.ph,data.ph$Mito=="BY4741"|data.ph$Mito=="YPS606")
data.py<-subset(data.ko.mtn,data.ko.mtn$Nuclear=="parent"|data.ko.mtn$Nuclear=="yat1D")
data.pyPN<-subset(data.py,data.py$Mito=="BY4741"|data.py$Mito=="NCYC110")
data.pyPY<-subset(data.py,data.py$Mito=="BY4741"|data.py$Mito=="YPS606")
data.pe<-subset(data.ko.mtn,data.ko.mtn$Nuclear=="parent"|data.ko.mtn$Nuclear=="est1D")
data.pePN<-subset(data.pe,data.pe$Mito=="BY4741"|data.pe$Mito=="NCYC110")
data.pePY<-subset(data.pe,data.pe$Mito=="BY4741"|data.pe$Mito=="YPS606")
FM.phPN.cbind<-glm(cbind(data.phPN$Petite, data.phPN$Grande)~data.phPN$Nuclear*data.phPN$Mito, family = binomial)
anova(FM.phPN.cbind, test = "Chisq")
FM.phPY.cbind<-glm(cbind(data.phPY$Petite, data.phPY$Grande)~data.phPY$Nuclear*data.phPY$Mito, family = binomial)
anova(FM.phPY.cbind, test = "Chisq")
FM.pyPN.cbind<-glm(cbind(data.pyPN$Petite, data.pyPN$Grande)~data.pyPN$Nuclear*data.pyPN$Mito, family = binomial)
anova(FM.pyPN.cbind, test = "Chisq")
FM.pyPY.cbind<-glm(cbind(data.pyPY$Petite, data.pyPY$Grande)~data.pyPY$Nuclear*data.pyPY$Mito, family = binomial)
anova(FM.pyPY.cbind, test = "Chisq")
###updated Table S10 ##parental vs. est1∆ ###### mtDNA comparison: BY4741 vs NCYC110
FM.pePN.cbind<-glm(cbind(data.pePN$Petite, data.pePN$Grande)~data.pePN$Nuclear*data.pePN$Mito, family = binomial)
anova(FM.pePN.cbind, test = "Chisq")
FM.pePY.cbind<-glm(cbind(data.pePY$Petite, data.pePY$Grande)~data.pePY$Nuclear*data.pePY$Mito, family = binomial)
anova(FM.pePY.cbind, test = "Chisq")
#import data
data.ko.mtn.bhg<-read.csv("data.ko.mtn.rollerassays_bns1smi1hgh1.csv", header = TRUE, stringsAsFactors = FALSE)
data.ko.mtn.bhg$Mitotype<-as.factor(data.ko.mtn.bhg$Mitotype)
data.ko.mtn.bhg$GeneKO<-as.factor(data.ko.mtn.bhg$GeneKO)
#set classes
data.ko.mtn.bhg$Petite<-as.integer(data.ko.mtn.bhg$Petite)
data.ko.mtn.bhg$Grande<-as.integer(data.ko.mtn.bhg$Grande)
data.ko.mtn.bhg$Total<-as.integer(data.ko.mtn.bhg$Total)
interaction plot
interaction.plot(data.ko.mtn.bhg$Mitotype ,data.ko.mtn.bhg$GeneKO ,data.ko.mtn.bhg$Pet.Mean,
xlab = "Mitotype",ylab = "Mean Petite Frequency",
main = "Fig S5",
ylim=c(0,30),trace.label="Nuclear", type = "b", col=c("green","black","blue","orange"),
pch = c(15,17), fixed =TRUE)
FM.ko.mtn.bhg.cbind<-glm(cbind(data.ko.mtn.bhg$Petite, data.ko.mtn.bhg$Grande)~data.ko.mtn.bhg$GeneKO*data.ko.mtn.bhg$Mitotype, family = binomial)
anova(FM.ko.mtn.bhg.cbind, test = "Chisq")
import data
data.rtpcr<-read.table("Relative Expression.txt")
data.rtpcr$Can_Hap<-as.factor(data.rtpcr$Can_Hap)
data.rtpcr$Mitotype<-as.factor(data.rtpcr$Mitotype)
data.rtpcr$MIP1_Hap<-as.factor(data.rtpcr$MIP1_Hap)
###Fig S6 ### Does expression correlate with petite frequency? ##### Plot
require(ggplot2)
library(ggplot2)
# Gene Expression compared to petite frequency. MPI1 = ALL
BNS1.pf <- ggplot(data.rtpcr, aes(x = BNS1_res, y = Pet_Freq)) + geom_point() + geom_smooth(method = lm) + ggtitle("BNS1") + xlab("Expression") + ylab("Petite Frequency (%)")
HGH1.pf <- ggplot(data.rtpcr, aes(x = HGH1_res, y = Pet_Freq)) + geom_point() + geom_smooth(method = lm) + ggtitle("HGH1") + xlab("Expression") + ylab("Petite Frequency (%)")
SMI1.pf <- ggplot(data.rtpcr, aes(x = SMI1_res, y = Pet_Freq)) + geom_point() + geom_smooth(method = lm) + ggtitle("SMI1") + xlab("Expression") + ylab("Petite Frequency (%)")
YAT1.pf <- ggplot(data.rtpcr, aes(x = YAT1_res, y = Pet_Freq)) + geom_point() + geom_smooth(method = lm) + ggtitle("YAT1") + xlab("Expression") + ylab("Petite Frequency (%)")
MIP1.pf <- ggplot(data.rtpcr, aes(x = MIP1_res, y = Pet_Freq)) + geom_point() + geom_smooth(method = lm) + ggtitle("MIP1") + xlab("Expression") + ylab("Petite Frequency (%)")
# combine plots to a single grid
require("gridExtra")
## Loading required package: gridExtra
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
grid.pf <- grid.arrange(BNS1.pf, HGH1.pf, SMI1.pf, YAT1.pf, MIP1.pf, nrow = 2, ncol = 3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
###
# Does expression of MIP1 correlate with petite frequency??
# linear model of correlation between expression and petite frequency in MIP1
lm.mip.pfr <- lm(data.rtpcr$Pet_Freq ~ data.rtpcr$MIP1_res)
anova.mip.pfr <- anova(lm.mip.pfr)
anova.mip.pfr
# p = 0.1229927, no correlation
summary(lm.mip.pfr) #R2=0.1422,
##
## Call:
## lm(formula = data.rtpcr$Pet_Freq ~ data.rtpcr$MIP1_res)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.104 -4.217 -1.482 2.172 14.439
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.276 1.452 4.323 0.000525 ***
## data.rtpcr$MIP1_res 1885.434 1157.926 1.628 0.122993
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 6.159 on 16 degrees of freedom
## Multiple R-squared: 0.1422,
## Adjusted R-squared: 0.08854
## F-statistic: 2.651 on 1 and 16 DF, p-value: 0.123
cor.test.mip.pfr<-cor.test(data.rtpcr$Pet_Freq,data.rtpcr$MIP1_res)
cor.test.mip.pfr #cor=r=0.0.3770298
##
## Pearson's product-moment correlation
##
## data: data.rtpcr$Pet_Freq and data.rtpcr$MIP1_res
## t = 1.6283, df = 16, p-value = 0.123
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1090326 0.7175873
## sample estimates:
## cor
## 0.3770298
cor.test.mip.pfr$p.value #p=0.1229927
## [1] 0.1229927
# Does expression of BNS1 correlate with petite frequency?
lm.bns.pfr <- lm(data.rtpcr$Pet_Freq ~ data.rtpcr$BNS1_res)
anova.bns.pfr <- anova(lm.bns.pfr)
anova.bns.pfr
# p = 0.4434, no correlation
summary(lm.bns.pfr) #R2=0.03717
##
## Call:
## lm(formula = data.rtpcr$Pet_Freq ~ data.rtpcr$BNS1_res)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.960 -4.257 -2.573 4.258 16.373
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.276 1.538 4.081 0.000871 ***
## data.rtpcr$BNS1_res 1050.590 1336.746 0.786 0.443392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 6.525 on 16 degrees of freedom
## Multiple R-squared: 0.03717,
## Adjusted R-squared: -0.02301
## F-statistic: 0.6177 on 1 and 16 DF, p-value: 0.4434
cor.test.bns.pfr<-cor.test(data.rtpcr$Pet_Freq,data.rtpcr$BNS1_res)
cor.test.bns.pfr #cor=r=0.1927965
##
## Pearson's product-moment correlation
##
## data: data.rtpcr$Pet_Freq and data.rtpcr$BNS1_res
## t = 0.78593, df = 16, p-value = 0.4434
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3011834 0.6051926
## sample estimates:
## cor
## 0.1927965
cor.test.bns.pfr$p.value #p=0.4433923
## [1] 0.4433923
#Does expression of HGH1 correlate with petite frequency?
lm.hgh.pfr <- lm(data.rtpcr$Pet_Freq ~ data.rtpcr$HGH1_res)
anova.hgh.pfr <- anova(lm.hgh.pfr)
anova.hgh.pfr
# p = 0.07751, no correlation :/
summary(lm.hgh.pfr) #R2=0.182,
##
## Call:
## lm(formula = data.rtpcr$Pet_Freq ~ data.rtpcr$HGH1_res)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.540 -4.309 -1.267 2.474 13.925
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.276 1.418 4.427 0.000423 ***
## data.rtpcr$HGH1_res 1773.321 940.002 1.887 0.077511 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 6.014 on 16 degrees of freedom
## Multiple R-squared: 0.182,
## Adjusted R-squared: 0.1308
## F-statistic: 3.559 on 1 and 16 DF, p-value: 0.07751
cor.test.hgh.pfr<-cor.test(data.rtpcr$Pet_Freq,data.rtpcr$HGH1_res)
cor.test.hgh.pfr #cor=r=0.4265
##
## Pearson's product-moment correlation
##
## data: data.rtpcr$Pet_Freq and data.rtpcr$HGH1_res
## t = 1.8865, df = 16, p-value = 0.07751
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.05032692 0.74505714
## sample estimates:
## cor
## 0.4265658
cor.test.hgh.pfr$p.value #p=0.0775
## [1] 0.07751052
#Does expression of SMI1 correlate with petite frequency?
lm.smi.pfr <- lm(data.rtpcr$Pet_Freq ~ data.rtpcr$SMI1_res)
anova.smi.pfr <- anova(lm.smi.pfr)
anova.smi.pfr
# p = 0.2322, no correlation
summary(lm.smi.pfr) #R2=0.08791
##
## Call:
## lm(formula = data.rtpcr$Pet_Freq ~ data.rtpcr$SMI1_res)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.147 -3.876 -2.149 3.996 14.976
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.276 1.497 4.193 0.000689 ***
## data.rtpcr$SMI1_res 3561.285 2867.811 1.242 0.232202
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 6.351 on 16 degrees of freedom
## Multiple R-squared: 0.08791,
## Adjusted R-squared: 0.0309
## F-statistic: 1.542 on 1 and 16 DF, p-value: 0.2322
cor.test.smi.pfr<-cor.test(data.rtpcr$Pet_Freq,data.rtpcr$SMI1_res)
cor.test.smi.pfr #cor=r=0.2964936
##
## Pearson's product-moment correlation
##
## data: data.rtpcr$Pet_Freq and data.rtpcr$SMI1_res
## t = 1.2418, df = 16, p-value = 0.2322
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1977498 0.6705443
## sample estimates:
## cor
## 0.2964936
cor.test.smi.pfr$p.value #p=0.2322017
## [1] 0.2322017
#Does expression of YAT1 correlate with petite frequency?
lm.yat.pfr <- lm(data.rtpcr$Pet_Freq ~ data.rtpcr$YAT1_res)
anova.yat.pfr <- anova(lm.yat.pfr)
anova.yat.pfr
# p = 0.4723, no correlation
summary(lm.yat.pfr) #R2=0.03276
##
## Call:
## lm(formula = data.rtpcr$Pet_Freq ~ data.rtpcr$YAT1_res)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.696 -4.470 -3.240 4.654 18.053
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.276 1.541 4.072 0.000888 ***
## data.rtpcr$YAT1_res 449.326 610.341 0.736 0.472272
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## s: 6.54 on 16 degrees of freedom
## Multiple R-squared: 0.03276,
## Adjusted R-squared: -0.02769
## F-statistic: 0.542 on 1 and 16 DF, p-value: 0.4723
cor.test.yat.pfr<-cor.test(data.rtpcr$Pet_Freq,data.rtpcr$YAT1_res)
cor.test.yat.pfr #cor=r=0.0.181
##
## Pearson's product-moment correlation
##
## data: data.rtpcr$Pet_Freq and data.rtpcr$YAT1_res
## t = 0.73619, df = 16, p-value = 0.4723
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3122503 0.5973934
## sample estimates:
## cor
## 0.1810071
cor.test.yat.pfr$p.value #p=0.47227
## [1] 0.4722721
#Fig S7
require(ggplot2)
library(ggplot2)
# Gene Expression in Candidate Haplotypes (DDDA, GAID)
BNS1.Can <- ggplot(data.rtpcr, aes(x = Can_Hap, y = BNS1_res)) + geom_boxplot() + ggtitle("BNS1") + xlab("MtN Hap") + ylab(NULL)
HGH1.Can <- ggplot(data.rtpcr, aes(x = Can_Hap, y = HGH1_res)) + geom_boxplot() + ggtitle("HGH1") + xlab("MtN Hap") + ylab("Normalized Expression (residuals)")
SMI1.Can <- ggplot(data.rtpcr, aes(x = Can_Hap, y = SMI1_res)) + geom_boxplot() + ggtitle("SMI1") + xlab("MtN Hap") + ylab(NULL)
YAT1.Can <- ggplot(data.rtpcr, aes(x = Can_Hap, y = YAT1_res)) + geom_boxplot() + ggtitle("YAT1") + xlab("MtN Hap") + ylab(NULL)
MIP1.Can <- ggplot(data.rtpcr, aes(x = Can_Hap, y = MIP1_res)) + geom_boxplot() + ggtitle("MIP1") + xlab("MtN Hap") + ylab(NULL)
# Gene Expression in MIP1 Haplotypes (GGC, TAT)
BNS1.MIP1_Hap <- ggplot(data.rtpcr, aes(x = MIP1_Hap, y = BNS1_res)) + geom_boxplot() + ggtitle("BNS1") + xlab("MIP1 Hap") + ylab(NULL)
HGH1.MIP1_Hap <- ggplot(data.rtpcr, aes(x = MIP1_Hap, y = HGH1_res)) + geom_boxplot() + ggtitle("HGH1") + xlab("MIP1 Hap") + ylab("Normalized Expression (residuals)")
SMI1.MIP1_Hap <- ggplot(data.rtpcr, aes(x = MIP1_Hap, y = SMI1_res)) + geom_boxplot() + ggtitle("SMI1") + xlab("MIP1 Hap") + ylab(NULL)
YAT1.MIP1_Hap <- ggplot(data.rtpcr, aes(x = MIP1_Hap, y = YAT1_res)) + geom_boxplot() + ggtitle("YAT1") + xlab("MIP1 Hap") + ylab(NULL)
MIP1.MIP1_Hap <- ggplot(data.rtpcr, aes(x = MIP1_Hap, y = MIP1_res)) + geom_boxplot() + ggtitle("MIP1") + xlab("MIP1 Hap") + ylab(NULL)
require("gridExtra")
grid.CAN<-grid.arrange(HGH1.Can, BNS1.Can, SMI1.Can, YAT1.Can, MIP1.Can, nrow = 1, ncol = 5)
grid.MIP1.hap <- grid.arrange(HGH1.MIP1_Hap,BNS1.MIP1_Hap,SMI1.MIP1_Hap,YAT1.MIP1_Hap, MIP1.MIP1_Hap, nrow = 1, ncol = 5)
plotcombined<-ggarrange(grid.CAN,grid.MIP1.hap,
labels = c("A", "B"),
ncol = 1, nrow = 2, align = "h")
plotcombined
# Is expression of MIP1 influenced by it's haplotype, the haplotype of the mitonuclear candidates, mitotype or any interaction?
lm.exp.mip <- lm(data.rtpcr$MIP1_res ~ (data.rtpcr$Can_Hap * data.rtpcr$Mitotype * data.rtpcr$MIP1_Hap))
anova.exp.mip <- anova(lm.exp.mip)
anova.exp.mip
# Is expression of BNS11 influenced by it's haplotype, the haplotype of the mitonuclear candidates, mitotype or any interaction?
lm.exp.bns <- lm(data.rtpcr$BNS1_res ~ (data.rtpcr$Can_Hap * data.rtpcr$Mitotype * data.rtpcr$MIP1_Hap))
anova.exp.bns <- anova(lm.exp.bns)
anova.exp.bns
# Is expression of HGH11 influenced by it's haplotype, the haplotype of the mitonuclear candidates, mitotype or any interaction?
lm.exp.hgh <- lm(data.rtpcr$HGH1_res ~ (data.rtpcr$Can_Hap * data.rtpcr$Mitotype * data.rtpcr$MIP1_Hap))
anova.exp.hgh <- anova(lm.exp.hgh)
anova.exp.hgh
# Is expression of SMI1 influenced by it's haplotype, the haplotype of the mitonuclear candidates, mitotype or any interaction?
lm.exp.smi <- lm(data.rtpcr$SMI1_res ~ (data.rtpcr$Can_Hap * data.rtpcr$Mitotype * data.rtpcr$MIP1_Hap))
anova.exp.smi <- anova(lm.exp.smi)
anova.exp.smi
# Is expression of YAT1 influenced by it's haplotype, the haplotype of the mitonuclear candidates, mitotype or any interaction?
lm.exp.yat <- lm(data.rtpcr$YAT1_res ~ (data.rtpcr$Can_Hap * data.rtpcr$Mitotype * data.rtpcr$MIP1_Hap))
anova.exp.yat <- anova(lm.exp.yat)
anova.exp.yat
###Figure 7A
#correlations between RC1 and RC2 growth data and petite frequencies
#import data
data.RC12<-read.csv("Pet_growth_RC12.csv")
head(data.RC12)
#does pet frequency correlate with growth data at 30 across the entire RC1 and RC2 collections?
plot(data.RC12$Petitefrequency, data.RC12$SD_Pheno_30)
cor.test(data.RC12$Petitefrequency, data.RC12$SD_Pheno_30)
##
## Pearson's product-moment correlation
##
## data: data.RC12$Petitefrequency and data.RC12$SD_Pheno_30
## t = 3.0265, df = 333, p-value = 0.002667
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.05746919 0.26610434
## sample estimates:
## cor
## 0.1636157
cols<-c("X273614N"= "grey40","YPS606" ="#009E73")
plotRC12 = ggplot(data.RC12, aes(x=SD_Pheno_30,y= Petitefrequency))+
geom_point(aes(color=Mito),size = 2)+scale_color_manual(values=cols)+
labs(x = "Vmax_30C", y = "Petite Frequency (%)")+
theme_bw()+
theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.border = element_blank() ) +
theme(axis.line = element_line(color = 'black'))+
theme(axis.text.x = element_text(size = 10))+
theme(legend.position = "none")+
geom_smooth(method = "lm", se = FALSE, color="black")
#Fig7A
plotRC12
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).
#is this significance driven by the one datapoint where petfreq>200?
data_subset<-subset(data.RC12,data.RC12$Petitefrequency<20)
cor.test(data_subset$Petitefrequency, data_subset$SD_Pheno_30)
##
## Pearson's product-moment correlation
##
## data: data_subset$Petitefrequency and data_subset$SD_Pheno_30
## t = 2.6073, df = 332, p-value = 0.009538
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.03486742 0.24523851
## sample estimates:
## cor
## 0.1416519
#is the vmax/petite correlation observed in RC1, RC2 alone?
data.RC1<-subset(data.RC12,data.RC12$Mito == "X273614N")
data.RC2<-subset(data.RC12,data.RC12$Mito == "YPS606")
plot(data.RC1$Petitefrequency, data.RC1$SD_Pheno_30)
plot(data.RC2$Petitefrequency, data.RC2$SD_Pheno_30)
cor.test(data.RC1$Petitefrequency, data.RC1$SD_Pheno_30)
##
## Pearson's product-moment correlation
##
## data: data.RC1$Petitefrequency and data.RC1$SD_Pheno_30
## t = 0.85311, df = 176, p-value = 0.3948
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.08370201 0.20928205
## sample estimates:
## cor
## 0.06417273
#corr = 0.064 p = 0.39
cor.test(data.RC2$Petitefrequency, data.RC2$SD_Pheno_30)
##
## Pearson's product-moment correlation
##
## data: data.RC2$Petitefrequency and data.RC2$SD_Pheno_30
## t = 3.1071, df = 155, p-value = 0.002248
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.08887121 0.38420459
## sample estimates:
## cor
## 0.2421386
#corr = 0.2 p = 0.002
#Fig7B
#The RC contains MIP1 (and other) alleles with strong independent effects on petite frequency. #maybe mitonuclear influenced pet freq correlations with growth will be more evident if the RC is subset into strains that have the high v low MIP1 alleles. #The file Nsnps contains the genotypes for each RC strain at the snps with significant nuclear associations.
#import file and rename first column
Nsnps<-read.csv("N_snps.csv")
names(Nsnps)[1]<-"Strain"
#subset Nsnps to Chr15 alleles and add to dataframe
df.MIP1snps<-Nsnps[,c("Strain","chr15.943237")]
data.mip1<-merge(data.RC12,df.MIP1snps, by="Strain")
#subset to MIP1 T and MIP1C
data.mip1T<-subset(data.mip1, chr15.943237=="T")
data.mip1C<-subset(data.mip1,chr15.943237=="C")
#subset to RC1 and RC2
data.RC1.mip1T<-subset(data.mip1T, Mito=="X273614N")
data.RC2.mip1T<-subset(data.mip1T,Mito=="YPS606")
data.RC1.mip1C<-subset(data.mip1C,Mito=="X273614N")
data.RC2.mip1C<-subset(data.mip1C,Mito=="YPS606")
#correlations
#mip1T
cor.test(data.mip1T$Petitefrequency, data.mip1T$SD_Pheno_30)
##
## Pearson's product-moment correlation
##
## data: data.mip1T$Petitefrequency and data.mip1T$SD_Pheno_30
## t = 1.2604, df = 175, p-value = 0.2092
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.05339938 0.23900518
## sample estimates:
## cor
## 0.0948483
#not correlated
#mip1C
cor.test(data.mip1C$Petitefrequency, data.mip1C$SD_Pheno_30)
##
## Pearson's product-moment correlation
##
## data: data.mip1C$Petitefrequency and data.mip1C$SD_Pheno_30
## t = 2.6583, df = 156, p-value = 0.008672
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.05377874 0.35284259
## sample estimates:
## cor
## 0.208171
#cor = 0.2 p = 0.008672
#RC1 MIP1T
cor.test(data.RC1.mip1T$Petitefrequency, data.RC1.mip1T$SD_Pheno_30)
##
## Pearson's product-moment correlation
##
## data: data.RC1.mip1T$Petitefrequency and data.RC1.mip1T$SD_Pheno_30
## t = 0.26131, df = 92, p-value = 0.7944
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1763563 0.2285894
## sample estimates:
## cor
## 0.02723385
#not correlated
#RC1 MIP1C
cor.test(data.RC1.mip1C$Petitefrequency, data.RC1.mip1C$SD_Pheno_30)
##
## Pearson's product-moment correlation
##
## data: data.RC1.mip1C$Petitefrequency and data.RC1.mip1C$SD_Pheno_30
## t = 0.22439, df = 82, p-value = 0.823
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1906351 0.2379042
## sample estimates:
## cor
## 0.02477252
#not correlated
#RC2 MIP1T
cor.test(data.RC2.mip1T$Petitefrequency, data.RC2.mip1T$SD_Pheno_30)
##
## Pearson's product-moment correlation
##
## data: data.RC2.mip1T$Petitefrequency and data.RC2.mip1T$SD_Pheno_30
## t = 1.1257, df = 81, p-value = 0.2636
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.09410133 0.33093751
## sample estimates:
## cor
## 0.1241069
#not correlated
#RC2 MIP1C
cor.test(data.RC2.mip1C$Petitefrequency, data.RC2.mip1C$SD_Pheno_30)
##
## Pearson's product-moment correlation
##
## data: data.RC2.mip1C$Petitefrequency and data.RC2.mip1C$SD_Pheno_30
## t = 3.4043, df = 72, p-value = 0.001087
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1572312 0.5537380
## sample estimates:
## cor
## 0.3723524
#corr = 0.37 p = 0.001
#plot
plot.RC2.MIP1C = ggplot(data.RC2.mip1C, aes(x=SD_Pheno_30,y= Petitefrequency))+
geom_point(aes(color=Mito),size = 2)+scale_color_manual(values=cols)+
labs(x = "Vmax_30C", y = "Petite Frequency (%)")+
theme_bw()+
theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.border = element_blank() ) +
theme(axis.line = element_line(color = 'black'))+
theme(axis.text.x = element_text(size = 10))+
theme(legend.position = "none")+
geom_smooth(method = "lm", se = FALSE, color="black")
#Fig7B
plot.RC2.MIP1C
## `geom_smooth()` using formula 'y ~ x'
#is this still significant if the outlier is removed?
data.RC2.MIP1C_subset<-subset(data.RC2.mip1C,data.RC2.mip1C$Petitefrequency<20)
cor.test(data.RC2.MIP1C_subset$Petitefrequency, data.RC2.MIP1C_subset$SD_Pheno_30)
##
## Pearson's product-moment correlation
##
## data: data.RC2.MIP1C_subset$Petitefrequency and data.RC2.MIP1C_subset$SD_Pheno_30
## t = 3.4043, df = 72, p-value = 0.001087
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1572312 0.5537380
## sample estimates:
## cor
## 0.3723524
#yes, it is R=0.37, p=0.001
#Fig 7C and 7D #emvironmental conditions that alter growth rates also alter petite frequencies #growth rates and petite frequencies were measured for 6 strains in low sugar at 30C, 30C, and 37C
###for fig 7CD
#growth
dfgrowth_full = read.csv("Vmax5_30_37_LS.csv", na.strings="NA",header=T)
# dfgrowth.summary<-ddply(dfgrowth_full,c("Nuclear", "Condition"), summarize, N1 = length(VmaxRaw), Vmaxaverage = mean(VmaxRaw), sd1 = sd(VmaxRaw), se = sd1 / sqrt (N1))
dfgrowth_subset<-subset(dfgrowth_full,Nuclear %in% c("UWOPS05-217.3","L-1528","BC187","YPS128","YPS606","Y12"))
dfgrowth_LSHS<-subset(dfgrowth_subset,Condition %in% c("LS","CSM30"))
dfgrowth_3037<-subset(dfgrowth_subset,Condition %in% c("CSM30","CSM37"))
#reorganize isolates for a certain order
dfgrowth_subset$Nuclear <- factor(dfgrowth_subset$Nuclear, levels=c("Y12","UWOPS05-217.3","YPS606","YPS128","L-1528","BC187"))
dfgrowth_subset$Condition <- factor(dfgrowth_subset$Condition, levels=c("LS","CSM30","CSM37"))
dfgrowth_LSHS$Nuclear<-factor(dfgrowth_LSHS$Nuclear, levels=c("Y12","UWOPS05-217.3","YPS606","YPS128","L-1528","BC187"))
dfgrowth_LSHS$Condition<-factor(dfgrowth_LSHS$Condition, levels=c("LS","CSM30"))
dfgrowth_3037$Nuclear<-factor(dfgrowth_3037$Nuclear, levels=c("Y12","UWOPS05-217.3","YPS606","YPS128","L-1528","BC187"))
dfgrowth_3037$Condition<-factor(dfgrowth_3037$Condition, levels=c("CSM30","CSM37"))
#plots
plot.growth<-ggplot(dfgrowth_subset,aes(x=Nuclear,y=VmaxRaw,fill=Condition))+geom_boxplot()
plot.growth
plot.growth.LSHS<-ggplot(dfgrowth_LSHS,aes(x=Nuclear, y=VmaxRaw,fill=Condition))+geom_boxplot()
plot.growth.LSHS
plot.growth.3037<-ggplot(dfgrowth_3037,aes(x=Nuclear, y=VmaxRaw,fill=Condition))+geom_boxplot()
plot.growth.3037
#petite
dfpet = read.csv("petite_3037LS.csv",na.strings="NA",header=T)
dfpet_LSHS<-subset(dfpet,Condition %in% c("LS30","CSM30"))
dfpet_3037<-subset(dfpet,Condition %in% c("CSM30","CSM37"))
#reorganize isolates for a certain order
dfpet$Strain <- factor(dfpet$Strain, levels=c("Y12","UWOPS052173","YPS606","YPS128","L1528","BC187"))
dfpet$Condition <- factor(dfpet$Condition, levels=c("LS30","CSM30","CSM37"))
dfpet_LSHS$Strain <- factor(dfpet_LSHS$Strain, levels=c("Y12","UWOPS052173","YPS606","YPS128","L1528","BC187"))
dfpet_LSHS$Condition <- factor(dfpet_LSHS$Condition, levels=c("LS30","CSM30","CSM37"))
dfpet_3037$Strain <- factor(dfpet_3037$Strain, levels=c("Y12","UWOPS052173","YPS606","YPS128","L1528","BC187"))
dfpet_3037$Condition <- factor(dfpet_3037$Condition, levels=c("LS30","CSM30","CSM37"))
#quick plots
plot.pet<-ggplot(dfpet,aes(x=Strain,y=Petitefrequency,fill=Condition))+geom_boxplot()
plot.pet
plot.pet.LSHS<-ggplot(dfpet_LSHS,aes(x=Strain,y=Petitefrequency,fill=Condition))+geom_boxplot()
plot.pet.LSHS
plot.pet.3037<-ggplot(dfpet_3037,aes(x=Strain,y=Petitefrequency,fill=Condition))+geom_boxplot()
plot.pet.3037
#shaded plots
#growth... LS CSM30 CSM37
rect_df<-data.frame(Nuclear=unique(dfgrowth_subset$Nuclear))
rect_df$VmaxRaw<-0
head(rect_df)
plot.growth<-ggplot(dfgrowth_subset,aes(x=Nuclear, y=VmaxRaw,fill=Condition))+geom_boxplot()
plot.growth
plot.vmax.shaded <- ggplot(dfgrowth_subset,aes(factor(Nuclear),VmaxRaw, fill = Condition))+
geom_rect(data=rect_df,
aes(fill=factor(Nuclear)), alpha=0.3,
xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf)+
scale_fill_manual(values=c("BC187"= "#56B4E9","L-1528"= "#56B4E9","UWOPS05-217.3" ="#CC79A7", "YPS606"="#009E73", "YPS128"="#009E73", "Y12" = "#D55E00","LS"="grey100","CSM30"="grey75","CSM37"="grey50"))+
geom_boxplot()+ theme(legend.position = "none",axis.ticks.x=element_blank(), axis.text.x=element_blank())+xlab("")+ ylab("Vmax (arbitrary units)")+
facet_grid(~factor(Nuclear), scales='free_x')
#Fig7C
plot.vmax.shaded
#petite LS 30 37
rect_df2<-data.frame(Strain=unique(dfpet$Strain))
rect_df2$Petitefrequency<-0
head(rect_df2)
plot.pet<-ggplot(dfpet,aes(x=Strain, y=Petitefrequency,fill=Condition))+geom_boxplot()
plot.pet
plot.pet.shaded <- ggplot(dfpet,aes(factor(Strain),Petitefrequency, fill = Condition))+
geom_rect(data=rect_df2,
aes(fill=factor(Strain)), alpha=0.3,
xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf)+
scale_fill_manual(values=c("BC187"= "#56B4E9","L1528"= "#56B4E9","UWOPS052173" ="#CC79A7", "YPS606"="#009E73", "YPS128"="#009E73", "Y12" = "#D55E00","LS30"="grey100","CSM30"="grey75","CSM37"="grey50"))+
geom_boxplot()+ theme(legend.position = "none",axis.ticks.x=element_blank(), axis.text.x=element_blank())+xlab("low sugar (white), 30C (light gray), 37C(dark gray)")+ ylab("Petite frequency (%)")+
facet_grid(~factor(Strain), scales='free_x')
#Fig7D
plot.pet.shaded